package spring.boot.seimi.crawler;

import cn.wanghaomiao.seimi.annotation.Crawler;
import cn.wanghaomiao.seimi.def.BaseSeimiCrawler;
import cn.wanghaomiao.seimi.struct.Request;
import cn.wanghaomiao.seimi.struct.Response;
import org.seimicrawler.xpath.JXDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;

@Crawler(name = "basic")
public class Basic extends BaseSeimiCrawler {

	private static final Logger logger = LoggerFactory.getLogger(Basic.class);

	@Override
	public String[] startUrls() {
		return new String[]{"https://www.jianshu.com/c/18dd84d66135"};
	}

	@Override
	public void start(Response response) {
		JXDocument doc = response.document();
		try {
			List<Object> urls = doc.sel("//a[@class='title']/@href");
			logger.info("文章数量=[{}]",urls.size());
			for (Object url : urls) {
				logger.info("url:[{}]","https://www.jianshu.com"+url);
				push(Request.build("https://www.jianshu.com"+url,Basic::getTitle));
			}
		}catch (Exception e){
			e.printStackTrace();
		}
	}

	public void getTitle(Response response) {
		JXDocument doc = response.document();
		try {
			logger.info("url:{}",response.getUrl());
			logger.info("title:{}",doc.sel("//h1[@class='_1RuRku']/text()"));
			logger.info("content:{}",doc.sel("//article[@class='_2rhmJa']/h1/text()|//article[@class='_2rhmJa']/p/text()|//article[@class='_2rhmJa']/blockquote/p/text()"));
		}catch (Exception e){
			e.printStackTrace();
		}
	}
}
